#pragma once
#include <cublas_v2.h>

namespace runtime {namespace cuda {

// Caffe gemm provides a simpler interface to the gemm functions, with the
// limitation that the data has to be contiguous in memory.
template <typename Dtype>
void caffe_gpu_gemm(const cublasOperation_t TransA,
    const cublasOperation_t TransB, const int M, const int N, const int K,
    const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta,
    Dtype* C, cublasHandle_t& cublas_handle_);


}} // end of namespace runtime/cuda
